# plotly standard imports
import plotly.graph_objs as go
import chart_studio.plotly as py
# Cufflinks wrapper on plotly
import cufflinks
# Data science imports
import pandas as pd
import numpy as np
# Options for pandas
pd.options.display.max_columns = 30
# Display all cell outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'
from plotly.offline import iplot, init_notebook_mode
cufflinks.go_offline(connected=True)
init_notebook_mode(connected=True)
# Set global theme
cufflinks.set_config_file(world_readable=True, theme='pearl')
Will use target dataset Bitcoin in Cryptocurrency Historical Prices
Bitcoin data at 1-day intervals from April 28, 2013
from src.load_datasets import load_input_dataset
input_dataset = load_input_dataset()
input_dataset.head()
| SNo | Name | Symbol | Date | High | Low | Open | Close | Volume | Marketcap | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Bitcoin | BTC | 2013-04-29 23:59:59 | 147.488007 | 134.000000 | 134.444000 | 144.539993 | 0.0 | 1.603769e+09 |
| 1 | 2 | Bitcoin | BTC | 2013-04-30 23:59:59 | 146.929993 | 134.050003 | 144.000000 | 139.000000 | 0.0 | 1.542813e+09 |
| 2 | 3 | Bitcoin | BTC | 2013-05-01 23:59:59 | 139.889999 | 107.720001 | 139.000000 | 116.989998 | 0.0 | 1.298955e+09 |
| 3 | 4 | Bitcoin | BTC | 2013-05-02 23:59:59 | 125.599998 | 92.281898 | 116.379997 | 105.209999 | 0.0 | 1.168517e+09 |
| 4 | 5 | Bitcoin | BTC | 2013-05-03 23:59:59 | 108.127998 | 79.099998 | 106.250000 | 97.750000 | 0.0 | 1.085995e+09 |
Will explore full input dataset
import sweetviz as sv
target_features = input_dataset[['High', 'Low', 'Open', 'Close', 'Volume', 'Marketcap']]
analyse_report = sv.analyze([target_features, 'Bitcoin'], target_feat="Close")
analyse_report.show_notebook()
target_features.head()
| High | Low | Open | Close | Volume | Marketcap | |
|---|---|---|---|---|---|---|
| 0 | 147.488007 | 134.000000 | 134.444000 | 144.539993 | 0.0 | 1.603769e+09 |
| 1 | 146.929993 | 134.050003 | 144.000000 | 139.000000 | 0.0 | 1.542813e+09 |
| 2 | 139.889999 | 107.720001 | 139.000000 | 116.989998 | 0.0 | 1.298955e+09 |
| 3 | 125.599998 | 92.281898 | 116.379997 | 105.209999 | 0.0 | 1.168517e+09 |
| 4 | 108.127998 | 79.099998 | 106.250000 | 97.750000 | 0.0 | 1.085995e+09 |
Feature evalution over time
datetime = pd.to_datetime(input_dataset['Date'])
target_features.index = datetime
target_features.iplot(
subplots=True,
)
target_features.describe().transpose()
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| High | 2862.0 | 4.974040e+03 | 7.188837e+03 | 7.456110e+01 | 4.260478e+02 | 1.197335e+03 | 8.138047e+03 | 5.833057e+04 |
| Low | 2862.0 | 4.695103e+03 | 6.667198e+03 | 6.552600e+01 | 4.156758e+02 | 1.164175e+03 | 7.703358e+03 | 5.567261e+04 |
| Open | 2862.0 | 4.836307e+03 | 6.933573e+03 | 6.850500e+01 | 4.212045e+02 | 1.180100e+03 | 7.924612e+03 | 5.753274e+04 |
| Close | 2862.0 | 4.852093e+03 | 6.975106e+03 | 6.843100e+01 | 4.209892e+02 | 1.182810e+03 | 7.926697e+03 | 5.753994e+04 |
| Volume | 2862.0 | 8.978475e+09 | 1.658135e+10 | 0.000000e+00 | 2.786250e+07 | 3.301950e+08 | 1.296743e+10 | 3.509679e+11 |
| Marketcap | 2862.0 | 8.591622e+10 | 1.287414e+11 | 7.784112e+08 | 5.988997e+09 | 1.924238e+10 | 1.387658e+11 | 1.072263e+12 |
Will take only last 4 years, because they mostly interesting
year = 365
years_count = 4
items_count = round(years_count * year)
last_years_dataset = input_dataset[-1 * items_count:]
last_years_datetime = pd.to_datetime(last_years_dataset['Date'])
last_years_dataset.head()
len(last_years_dataset)
| SNo | Name | Symbol | Date | High | Low | Open | Close | Volume | Marketcap | |
|---|---|---|---|---|---|---|---|---|---|---|
| 1402 | 1403 | Bitcoin | BTC | 2017-03-01 23:59:59 | 1222.500000 | 1179.689941 | 1180.040039 | 1222.500000 | 229056992.0 | 1.979446e+10 |
| 1403 | 1404 | Bitcoin | BTC | 2017-03-02 23:59:59 | 1262.130005 | 1215.619995 | 1224.680054 | 1251.010010 | 368275008.0 | 2.025854e+10 |
| 1404 | 1405 | Bitcoin | BTC | 2017-03-03 23:59:59 | 1280.310059 | 1250.709961 | 1250.709961 | 1274.989990 | 315739008.0 | 2.064926e+10 |
| 1405 | 1406 | Bitcoin | BTC | 2017-03-04 23:59:59 | 1279.400024 | 1230.510010 | 1277.430054 | 1255.150024 | 183270000.0 | 2.032998e+10 |
| 1406 | 1407 | Bitcoin | BTC | 2017-03-05 23:59:59 | 1267.290039 | 1238.060059 | 1254.290039 | 1267.119995 | 134127000.0 | 2.052624e+10 |
1460
last_years_features = last_years_dataset[['High', 'Low', 'Open', 'Close', 'Volume', 'Marketcap']]
last_years_features.index = last_years_datetime
last_years_features.iplot(
subplots=True,
)
Firstly define function for display frequiency
import tensorflow as tf
import matplotlib.pyplot as plt
def plot_log_freaquency(series):
fft = tf.signal.rfft(series)
f_per_dataset = np.arange(0, len(fft))
n_samples_d = len(series)
days_per_year = 365
years_per_dataset = n_samples_d/(days_per_year)
f_per_year = f_per_dataset/years_per_dataset
plt.step(f_per_year, np.abs(fft))
plt.xscale('log')
plt.xticks([1, 365], labels=['1/Year', '1/day'])
_ = plt.xlabel('Frequency (log scale)')
Frequency of price
plot_log_freaquency(last_years_dataset['Close'])
Frequency of transaction volume
plot_log_freaquency(last_years_dataset['Volume'])
from src.load_datasets import load_datasets
train_df, test_df = load_datasets()
train_df
| SNo | Name | Symbol | Date | High | Low | Open | Close | Volume | Marketcap | |
|---|---|---|---|---|---|---|---|---|---|---|
| 1402 | 1403 | Bitcoin | BTC | 2017-03-01 23:59:59 | 1222.500000 | 1179.689941 | 1180.040039 | 1222.500000 | 2.290570e+08 | 1.979446e+10 |
| 1403 | 1404 | Bitcoin | BTC | 2017-03-02 23:59:59 | 1262.130005 | 1215.619995 | 1224.680054 | 1251.010010 | 3.682750e+08 | 2.025854e+10 |
| 1404 | 1405 | Bitcoin | BTC | 2017-03-03 23:59:59 | 1280.310059 | 1250.709961 | 1250.709961 | 1274.989990 | 3.157390e+08 | 2.064926e+10 |
| 1405 | 1406 | Bitcoin | BTC | 2017-03-04 23:59:59 | 1279.400024 | 1230.510010 | 1277.430054 | 1255.150024 | 1.832700e+08 | 2.032998e+10 |
| 1406 | 1407 | Bitcoin | BTC | 2017-03-05 23:59:59 | 1267.290039 | 1238.060059 | 1254.290039 | 1267.119995 | 1.341270e+08 | 2.052624e+10 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2565 | 2566 | Bitcoin | BTC | 2020-05-07 23:59:59 | 9992.663921 | 9138.322572 | 9261.895058 | 9951.518745 | 6.111270e+10 | 1.827874e+11 |
| 2566 | 2567 | Bitcoin | BTC | 2020-05-08 23:59:59 | 9996.743353 | 9767.173070 | 9936.162482 | 9842.666368 | 5.178075e+10 | 1.808049e+11 |
| 2567 | 2568 | Bitcoin | BTC | 2020-05-09 23:59:59 | 9913.862993 | 9580.644492 | 9840.906384 | 9593.896734 | 4.656612e+10 | 1.762514e+11 |
| 2568 | 2569 | Bitcoin | BTC | 2020-05-10 23:59:59 | 9595.580629 | 8395.107451 | 9591.169231 | 8756.431142 | 6.332528e+10 | 1.608848e+11 |
| 2569 | 2570 | Bitcoin | BTC | 2020-05-11 23:59:59 | 9033.471176 | 8374.322975 | 8755.535639 | 8601.796202 | 5.711986e+10 | 1.580592e+11 |
1168 rows × 10 columns
import sweetviz as sv
feature_list = ['High', 'Low', 'Open', 'Close', 'Volume', 'Marketcap']
train_features = train_df[feature_list]
test_features = test_df[feature_list]
compare_report = sv.compare([train_features, 'Train data'], [test_features, 'Test data'], "Close")
compare_report.show_notebook()
train_datetime = pd.to_datetime(train_df['Date'])
test_datetime = pd.to_datetime(test_df['Date'])
train_features.index = train_datetime
test_features.index = test_datetime
train_features.iplot(subplots=True)
test_df
| SNo | Name | Symbol | Date | High | Low | Open | Close | Volume | Marketcap | |
|---|---|---|---|---|---|---|---|---|---|---|
| 2570 | 2571 | Bitcoin | BTC | 2020-05-12 23:59:59 | 8949.897979 | 8569.643988 | 8610.386212 | 8804.477811 | 4.214272e+10 | 1.617916e+11 |
| 2571 | 2572 | Bitcoin | BTC | 2020-05-13 23:59:59 | 9317.878554 | 8805.387813 | 8805.387813 | 9269.987706 | 4.555814e+10 | 1.703527e+11 |
| 2572 | 2573 | Bitcoin | BTC | 2020-05-14 23:59:59 | 9793.268209 | 9255.034846 | 9271.328638 | 9733.721471 | 5.642691e+10 | 1.788816e+11 |
| 2573 | 2574 | Bitcoin | BTC | 2020-05-15 23:59:59 | 9755.828498 | 9261.398000 | 9734.290775 | 9328.197226 | 4.815880e+10 | 1.714363e+11 |
| 2574 | 2575 | Bitcoin | BTC | 2020-05-16 23:59:59 | 9564.204989 | 9260.694163 | 9333.239873 | 9377.014026 | 3.616477e+10 | 1.723410e+11 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2857 | 2858 | Bitcoin | BTC | 2021-02-23 23:59:59 | 54204.929756 | 45290.590268 | 54204.929756 | 48824.426869 | 1.061025e+11 | 9.099259e+11 |
| 2858 | 2859 | Bitcoin | BTC | 2021-02-24 23:59:59 | 51290.136695 | 47213.498162 | 48835.087661 | 49705.333316 | 6.369552e+10 | 9.263931e+11 |
| 2859 | 2860 | Bitcoin | BTC | 2021-02-25 23:59:59 | 51948.966982 | 47093.853019 | 49709.082425 | 47093.853019 | 5.450657e+10 | 8.777661e+11 |
| 2860 | 2861 | Bitcoin | BTC | 2021-02-26 23:59:59 | 48370.785260 | 44454.842114 | 47180.464054 | 46339.760083 | 3.509679e+11 | 8.637523e+11 |
| 2861 | 2862 | Bitcoin | BTC | 2021-02-27 23:59:59 | 48253.270101 | 45269.025766 | 46344.772237 | 46188.451275 | 4.591095e+10 | 8.609781e+11 |
292 rows × 10 columns
test_features.iplot(subplots=True)
Will use only training mean and deviation for not give NN access to test dataset
Subtract the mean and divide by the standard deviation of each feature will give required normalisation
from sklearn.preprocessing import MinMaxScaler
train_mean = train_features.mean()
train_std = train_features.std()
train_normalised = (train_features - train_mean) / train_std
test_normalised = (test_features - train_mean) / train_std
train_normalised.head()
train_normalised.index = train_features.index
train_normalised.iplot(subplots=True, title="Train")
test_normalised.index = test_features.index
test_normalised.iplot(subplots=True, title="Test")
| High | Low | Open | Close | Volume | Marketcap | |
|---|---|---|---|---|---|---|
| Date | ||||||
| 2017-03-01 23:59:59 | -1.700268 | -1.744181 | -1.720713 | -1.711503 | -0.967220 | -1.731125 |
| 2017-03-02 23:59:59 | -1.688553 | -1.732598 | -1.707019 | -1.702748 | -0.955925 | -1.722970 |
| 2017-03-03 23:59:59 | -1.683178 | -1.721287 | -1.699034 | -1.695383 | -0.960187 | -1.716104 |
| 2017-03-04 23:59:59 | -1.683447 | -1.727798 | -1.690837 | -1.701476 | -0.970934 | -1.721715 |
| 2017-03-05 23:59:59 | -1.687027 | -1.725364 | -1.697936 | -1.697800 | -0.974921 | -1.718266 |
feature2normaliesd = pd.DataFrame({ 'Real': train_features['Close'], 'Normalised': train_normalised['Close']})
feature2normaliesd.index = train_features.index
feature2normaliesd.iplot(subplots=True)
from src.prepare_datasets import get_prepared_datasets
from src.window_generator import WindowGenerator
train_df, test_df = get_prepared_datasets()
w1 = WindowGenerator(
input_width=24, label_width=1, shift=24,
train_df=train_df, test_df=test_df,
label_columns=['Close']
)
w1
Total window size: 48 Input indices: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23] Label indices: [47] Label column name(s): ['Close']
w1.plot(plot_col='Close')
w1.train.element_spec
(TensorSpec(shape=(None, 24, 4), dtype=tf.float32, name=None), TensorSpec(shape=(None, 1, 1), dtype=tf.float32, name=None))
single_step_window = WindowGenerator(
input_width=1, label_width=1, shift=1,
train_df=train_df, test_df=test_df,
label_columns=['Close'])
single_step_window
Total window size: 2 Input indices: [0] Label indices: [1] Label column name(s): ['Close']
import tensorflow as tf
from src.BaselineModel import Baseline
column_indices = {name: i for i, name in enumerate(train_df.columns)}
baseline = Baseline(label_index=column_indices['Close'])
baseline.compile(
loss=tf.losses.MeanSquaredError(),
metrics=[tf.metrics.MeanAbsoluteError()]
)
performance = {}
performance['Baseline'] = baseline.evaluate(single_step_window.test, verbose=1)
10/10 [==============================] - 0s 2ms/step - loss: 0.0802 - mean_absolute_error: 0.1478
wide_window = WindowGenerator(
input_width=30, label_width=30, shift=1,
train_df=train_df, test_df=test_df,
label_columns=['Close'])
wide_window
Total window size: 31 Input indices: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29] Label indices: [ 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30] Label column name(s): ['Close']
print('Input shape:', wide_window.example[0].shape)
print('Output shape:', baseline(wide_window.example[0]).shape)
Input shape: (32, 30, 4) Output shape: (32, 30, 1)
wide_window.plot(baseline)
from src.libs import load
model = load()
Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= lstm (LSTM) (None, 30, 32) 4736 _________________________________________________________________ dense (Dense) (None, 30, 1) 33 ================================================================= Total params: 4,769 Trainable params: 4,769 Non-trainable params: 0 _________________________________________________________________
Try plot model
wide_window.plot(model)
--------------------------------------------------------------------------- InternalError Traceback (most recent call last) <ipython-input-30-8f8cbc2b183f> in <module> ----> 1 wide_window.plot(model) /work/src/window_generator/plot.py in plot(self, model, plot_col, max_subplots) 23 edgecolors='k', label='Labels', c='#2ca02c', s=64) 24 if model is not None: ---> 25 predictions = model(inputs) 26 plt.scatter(self.label_indices, predictions[n, :, label_col_index], 27 marker='X', edgecolors='k', label='Predictions', /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs) 1010 with autocast_variable.enable_auto_cast_variables( 1011 self._compute_dtype_object): -> 1012 outputs = call_fn(inputs, *args, **kwargs) 1013 1014 if self._activity_regularizer: /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/sequential.py in call(self, inputs, training, mask) 373 if not self.built: 374 self._init_graph_network(self.inputs, self.outputs) --> 375 return super(Sequential, self).call(inputs, training=training, mask=mask) 376 377 outputs = inputs # handle the corner case where self.layers is empty /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/functional.py in call(self, inputs, training, mask) 423 """ 424 return self._run_internal_graph( --> 425 inputs, training=training, mask=mask) 426 427 def compute_output_shape(self, input_shape): /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/functional.py in _run_internal_graph(self, inputs, training, mask) 558 559 args, kwargs = node.map_arguments(tensor_dict) --> 560 outputs = node.layer(*args, **kwargs) 561 562 # Update tensor_dict. /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs) 1010 with autocast_variable.enable_auto_cast_variables( 1011 self._compute_dtype_object): -> 1012 outputs = call_fn(inputs, *args, **kwargs) 1013 1014 if self._activity_regularizer: /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/layers/core.py in call(self, inputs) 1210 self.bias, 1211 self.activation, -> 1212 dtype=self._compute_dtype_object) 1213 1214 def compute_output_shape(self, input_shape): /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/layers/ops/core.py in dense(inputs, kernel, bias, activation, dtype) 54 # Broadcast kernel to inputs. 55 else: ---> 56 outputs = standard_ops.tensordot(inputs, kernel, [[rank - 1], [0]]) 57 # Reshape the output back to the original ndim of the input. 58 if not context.executing_eagerly(): /usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py in wrapper(*args, **kwargs) 199 """Call target, and fall back on dispatchers if there is a TypeError.""" 200 try: --> 201 return target(*args, **kwargs) 202 except (TypeError, ValueError): 203 # Note: convert_to_eager_tensor currently raises a ValueError, not a /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py in tensordot(a, b, axes, name) 4618 b_reshape, b_free_dims, b_free_dims_static = _tensordot_reshape( 4619 b, b_axes, True) -> 4620 ab_matmul = matmul(a_reshape, b_reshape) 4621 if isinstance(a_free_dims, list) and isinstance(b_free_dims, list): 4622 if (ab_matmul.get_shape().is_fully_defined() and /usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py in wrapper(*args, **kwargs) 199 """Call target, and fall back on dispatchers if there is a TypeError.""" 200 try: --> 201 return target(*args, **kwargs) 202 except (TypeError, ValueError): 203 # Note: convert_to_eager_tensor currently raises a ValueError, not a /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py in matmul(a, b, transpose_a, transpose_b, adjoint_a, adjoint_b, a_is_sparse, b_is_sparse, name) 3313 else: 3314 return gen_math_ops.mat_mul( -> 3315 a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name) 3316 3317 /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gen_math_ops.py in mat_mul(a, b, transpose_a, transpose_b, name) 5530 return _result 5531 except _core._NotOkStatusException as e: -> 5532 _ops.raise_from_not_ok_status(e, name) 5533 except _core._FallbackException: 5534 pass /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py in raise_from_not_ok_status(e, name) 6860 message = e.message + (" name: " + name if name is not None else "") 6861 # pylint: disable=protected-access -> 6862 six.raise_from(core._status_to_exception(e.code, message), None) 6863 # pylint: enable=protected-access 6864 /usr/local/lib/python3.6/dist-packages/six.py in raise_from(value, from_value) InternalError: Blas GEMV launch failed: m=32, n=960 [Op:MatMul]
OUT_STEPS=30
multi_window = WindowGenerator(
input_width=30, label_width=OUT_STEPS, shift=OUT_STEPS,
train_df=train_df, test_df=test_df,
label_columns=['Close'])
multi_window
import tensorflow as tf
from src.RepeatBaselineModel import RepeatBaseline
repeat_baseline = RepeatBaseline()
repeat_baseline.compile(loss=tf.losses.MeanSquaredError(),
metrics=[tf.metrics.MeanAbsoluteError()])
repeat_baseline.evaluate(multi_window.test, verbose=1)
multi_window.plot(repeat_baseline)
df = pd.read_csv('./metrics/training.csv')
df.head()
| epoch | loss | mean_absolute_error | val_loss | val_mean_absolute_error | |
|---|---|---|---|---|---|
| 0 | 0 | 0.450651 | 0.469097 | 15.367444 | 2.529698 |
| 1 | 1 | 0.374250 | 0.424918 | 13.981587 | 2.414152 |
| 2 | 2 | 0.356964 | 0.417801 | 13.627735 | 2.383838 |
| 3 | 3 | 0.350333 | 0.415161 | 14.051004 | 2.474146 |
| 4 | 4 | 0.345154 | 0.413286 | 13.788432 | 2.447649 |
df[['epoch', 'loss', 'val_loss']].iplot(
x='epoch',
mode='lines+markers',
xTitle='epoch',
yTitle='loss',
title='Training loss',
linecolor='black',
)
df[['epoch', 'mean_absolute_error', 'val_mean_absolute_error']].iplot(
x='epoch',
mode='lines+markers',
xTitle='epoch',
yTitle='mean_absolute_error',
title='mean_absolute_error'
)